## [1] "WP3_D12_1" "WP3_D12_2" "WP3_D16_1" "WP3_D16_2" "PKD_10_D12_1"
## [6] "PKD_10_D12_2" "PKD_10_D16_1" "PKD_10_D16_2" "PKD_11_D12_1" "PKD_11_D12_2"
## [11] "PKD_11_D16_1" "PKD_11_D16_2"
## # A tibble: 12 × 4
## sample_id group day rep
## <chr> <chr> <chr> <chr>
## 1 WP3_D12_1 WP3 D12 1
## 2 WP3_D12_2 WP3 D12 2
## 3 WP3_D16_1 WP3 D16 1
## 4 WP3_D16_2 WP3 D16 2
## 5 PKD_10_D12_1 PKD_10 D12 1
## 6 PKD_10_D12_2 PKD_10 D12 2
## 7 PKD_10_D16_1 PKD_10 D16 1
## 8 PKD_10_D16_2 PKD_10 D16 2
## 9 PKD_11_D12_1 PKD_11 D12 1
## 10 PKD_11_D12_2 PKD_11 D12 2
## 11 PKD_11_D16_1 PKD_11 D16 1
## 12 PKD_11_D16_2 PKD_11 D16 2
## # A tibble: 12 × 5
## sample_id group day rep condition
## <chr> <chr> <chr> <chr> <chr>
## 1 WP3_D12_1 WP3 D12 1 WT
## 2 WP3_D12_2 WP3 D12 2 WT
## 3 WP3_D16_1 WP3 D16 1 WT
## 4 WP3_D16_2 WP3 D16 2 WT
## 5 PKD_10_D12_1 PKD_10 D12 1 KO
## 6 PKD_10_D12_2 PKD_10 D12 2 KO
## 7 PKD_10_D16_1 PKD_10 D16 1 KO
## 8 PKD_10_D16_2 PKD_10 D16 2 KO
## 9 PKD_11_D12_1 PKD_11 D12 1 KO
## 10 PKD_11_D12_2 PKD_11 D12 2 KO
## 11 PKD_11_D16_1 PKD_11 D16 1 KO
## 12 PKD_11_D16_2 PKD_11 D16 2 KO
##
## PKD_10 PKD_11 WP3
## 4 4 4
var_explained <- (pca$sdev^2 / sum(pca$sdev^2)) * 100
x_lab <- paste0("PC1 (", round(var_explained[1], 1), "%)")
y_lab <- paste0("PC2 (", round(var_explained[2], 1), "%)")
pca_df$group <- factor(pca_df$group,
levels = c("WP3", "PKD_10", "PKD_11")) # WP3 = WT
ggplot(pca_df, aes(x = PC1, y = PC2, color = group, shape = day)) +
geom_point(size = 7, alpha = 0.75) +
scale_color_manual(
values = c(
"WP3" = "#E0BA3E", # WT first
"PKD_10" = "#529DDA",
"PKD_11" = "#7570b3"
),
name = "Experimental Group",
labels = c("WT", "PKD10", "PKD11"),
guide = guide_legend(order = 1)
)+
scale_shape_manual(
values = c("D12" = 16, "D16" = 17),
name = "Timepoint",
labels = c("Day 12", "Day 16"),
guide = guide_legend(order = 2)
) +
theme_minimal(base_size = 14, base_family = "Helvetica Neue Light")+
theme(
legend.position = "right", # move legend below plot
legend.box = "vertical", # horizontal layout
plot.title = element_text(size = 16, hjust = 0.5, face = "bold"), # center title
axis.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 11),
legend.title = element_text(size = 11),
legend.text = element_text(size = 10),
panel.grid.major = element_line(color = "grey80", linewidth = 0.2),
panel.grid.minor = element_line(color = "grey90", linewidth = 0.2),
) var_explained <- (pca$sdev^2 / sum(pca$sdev^2)) * 100
x_lab <- paste0("PC1 (", round(var_explained[1], 1), "%)")
y_lab <- paste0("PC2 (", round(var_explained[2], 1), "%)")
pca_df$condition <- factor(pca_df$condition,
levels = c("WT", "KO"))
ggplot(pca_df, aes(x = PC1, y = PC2, color = condition, shape = day)) +
geom_point(size = 7, alpha = 0.75) +
scale_color_manual(
values = c( "WT" = "#E0BA3E",
"KO" = "#529DDA"),
name = "Condition",
labels = c("WT", "PKD")
) +
scale_shape_manual(
values = c("D12" = 16, "D16" = 17),
name = "Timepoint",
labels = c("Day 12", "Day 16")
) +
guides(
color = guide_legend(order = 1),
shape = guide_legend(order = 2)
) +
theme_minimal(base_size = 14, base_family = "Helvetica Neue Light") +
theme(
legend.position = "right",
legend.box = "vertical",
plot.title = element_text(size = 16, hjust = 0.5, face = "bold"),
axis.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 11),
legend.title = element_text(size = 11),
legend.text = element_text(size = 10),
panel.grid.major = element_line(color = "grey80", linewidth = 0.2),
panel.grid.minor = element_line(color = "grey90", linewidth = 0.2)
) pc3_lab <- paste0("PC3 (", round(var_explained[3], 1), "%)")
ggplot(pca_df, aes(x = PC1, y = PC2,
color = condition,
shape = day,
size = PC3)) +
geom_point(alpha = 0.75) +
scale_color_manual(
values = c("WT" = "#E0BA3E", "KO" = "#529DDA"),
name = "Condition",
labels = c("WT", "PKD")
) +
scale_shape_manual(
values = c("D12" = 16, "D16" = 17),
name = "Timepoint",
labels = c("Day 12", "Day 16")
) +
scale_size_continuous(name = pc3_lab) + # PC3 % here
guides(
color = guide_legend(order = 1),
shape = guide_legend(order = 2),
size = guide_legend(order = 3)
) +
theme_minimal(base_size = 14, base_family = "Helvetica Neue Light") +
theme(
legend.position = "right",
legend.box = "vertical",
plot.title = element_text(size = 16, hjust = 0.5, face = "bold"),
axis.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 11),
legend.title = element_text(size = 11),
legend.text = element_text(size = 10),
panel.grid.major = element_line(color = "grey80", linewidth = 0.2),
panel.grid.minor = element_line(color = "grey90", linewidth = 0.2)
) +
labs(
title = "PCA of RNA-seq samples",
x = x_lab,
y = y_lab
)var_explained <- (pca$sdev^2 / sum(pca$sdev^2)) * 100
x_lab <- paste0("PC1 (", round(var_explained[1], 1), "%)")
y_lab <- paste0("PC2 (", round(var_explained[2], 1), "%)")
z_lab <- paste0("PC3 (", round(var_explained[3], 1), "%)")
scatterplot3d(
x = pca_df$PC1,
y = pca_df$PC2,
z = pca_df$PC3,
color = ifelse(pca_df$condition == "WT", "#E0BA3E", "#529DDA"),
pch = ifelse(pca_df$day == "D12", 16, 17),
xlab = x_lab,
ylab = y_lab,
zlab = z_lab,
main = "3D PCA of RNA-seq samples"
)## Upregulated genes: 1006
## Downregulated genes: 1363
## Not significant: 16848
## Upregulated genes: 1398
## Downregulated genes: 989
## Not significant: 16830
top_genes <- rownames(sig_deg[order(sig_deg$adj.P.Val), ])[1:50]
mat_top <- log_mat[top_genes, ]
annotation_col <- data.frame(
Condition = sample_df$condition,
Day = sample_df$day
)
rownames(annotation_col) <- sample_df$sample_id
ann_colors <- list(
Condition = c(Healthy = "#E0BA3E", PKD = "#529DDA"),
Day = c(D12 = "#b3b3b3", D16 = "#4d4d4d")
)
my_colors <- colorRampPalette(c("blue", "white", "red"))(100)
mat_scaled <- t(scale(t(mat_top)))
library(pheatmap)
pheatmap(
mat_scaled,
color = my_colors,
cluster_rows = TRUE,
cluster_cols = TRUE,
annotation_col = annotation_col,
annotation_colors = ann_colors,
fontsize = 10,
fontsize_row = 8,
border_color = NA,
scale = "none",
main = "Top 50 Differentially Expressed Genes (PKD vs Healthy)",
clustering_method = "ward.D2",
show_rownames = TRUE,
show_colnames = FALSE,
labels_row = rownames(mat_scaled),
labels_col = colnames(mat_scaled),
fontfamily = "Helvetica Neue Light"
)# --- Gene list for Day 12 volcano highlights ---
genes_D12 <- c(
"CHCHD2", "MKI67", "CDC25C", "CDC20", "IL17C", "COL22A1",
"CYP2C9", "CCL4", "COL12A1", "CBR1"
)
# --- Normalize case to ensure matching ---
rownames(log_mat) <- toupper(rownames(log_mat))
genes_upper <- toupper(genes_D12)
# --- Subset only genes that exist ---
genes_found <- genes_upper[genes_upper %in% rownames(log_mat)]
cat("✅ Genes found:", paste(genes_found, collapse = ", "), "\n")## ✅ Genes found: CHCHD2, MKI67, CDC25C, CDC20, IL17C, COL22A1, CYP2C9, CCL4, COL12A1, CBR1
# --- Subset metadata to only Day 12 samples ---
sample_df_d12 <- sample_df %>% dplyr::filter(day == "D12")
sample_ids_d12 <- sample_df_d12$sample_id
# --- Subset expression matrix to Day 12 samples and selected genes ---
mat_subset_d12 <- log_mat[genes_found, sample_ids_d12, drop = FALSE]
# --- Scale by gene (z-score) ---
mat_scaled_d12 <- t(scale(t(mat_subset_d12)))
# --- Build annotation table for columns (samples) ---
annotation_col <- data.frame(
Condition = sample_df_d12$condition
)
rownames(annotation_col) <- sample_df_d12$sample_id
# --- Define colors ---
ann_colors <- list(
Condition = c(Healthy = "#E0BA3E", PKD = "#529DDA")
)
# --- Draw the heatmap ---
p_d12 <- pheatmap(
mat_scaled_d12,
color = colorRampPalette(c("blue", "white", "red"))(100),
cluster_rows = TRUE,
cluster_cols = TRUE,
annotation_col = annotation_col,
annotation_colors = ann_colors,
fontsize_row = 10,
fontsize_col = 10,
border_color = NA,
scale = "none",
main = "Day 12: PKD vs Healthy",
clustering_method = "ward.D2",
show_rownames = TRUE,
show_colnames = FALSE,
labels_row = rownames(mat_scaled_d12),
labels_col = colnames(mat_scaled_d12),
fontfamily = "Helvetica Neue Light"
)# --- Gene list for Day 16 volcano highlights ---
genes_D16 <- c(
"CHCHD2", "GJA1", "KCNE3", "SLC26A9", "KCNJ2", "CXCL14", "AQP2", "CCL28",
"COL12A1", "CBR1", "KCNH2", "SLC12A3", "COL1A2", "COL22A1", "NID2", "CCL4"
)
# --- Normalize case to ensure matching ---
rownames(log_mat) <- toupper(rownames(log_mat))
genes_upper <- toupper(genes_D16)
# --- Subset only genes that exist ---
genes_found <- genes_upper[genes_upper %in% rownames(log_mat)]
cat("✅ Genes found:", paste(genes_found, collapse = ", "), "\n")## ✅ Genes found: CHCHD2, GJA1, KCNE3, SLC26A9, KCNJ2, CXCL14, AQP2, CCL28, COL12A1, CBR1, KCNH2, SLC12A3, COL1A2, COL22A1, NID2, CCL4
# --- Subset metadata to only Day 16 samples ---
sample_df_d16 <- sample_df %>% dplyr::filter(day == "D16")
sample_ids_d16 <- sample_df_d16$sample_id
# --- Subset expression matrix to Day 16 samples and selected genes ---
mat_subset_d16 <- log_mat[genes_found, sample_ids_d16, drop = FALSE]
# --- Scale by gene (z-score) ---
mat_scaled_d16 <- t(scale(t(mat_subset_d16)))
# --- Build annotation table for columns (samples) ---
annotation_col <- data.frame(
Condition = sample_df_d16$condition
)
rownames(annotation_col) <- sample_df_d16$sample_id
# --- Define colors ---
ann_colors <- list(
Condition = c(Healthy = "#E0BA3E", PKD = "#529DDA")
)
# --- Draw the heatmap ---
pheatmap(
mat_scaled_d16,
color = colorRampPalette(c("blue", "white", "red"))(100),
cluster_rows = TRUE,
cluster_cols = TRUE,
annotation_col = annotation_col,
annotation_colors = ann_colors,
fontsize_row = 10,
fontsize_col = 10,
border_color = NA,
scale = "none",
main = "Day 16: PKD vs Healthy",
clustering_method = "ward.D2",
show_rownames = TRUE,
show_colnames = FALSE,
labels_row = rownames(mat_scaled_d16),
labels_col = colnames(mat_scaled_d16),
fontfamily = "Helvetica Neue Light"
)# Simplify condition (merge PKD_10 and PKD_11 into PKD)
sample_df <- sample_df %>%
mutate(
condition = case_when(
group == "WP3" ~ "Healthy",
group %in% c("PKD_10", "PKD_11") ~ "PKD"
),
condition_day = paste0(condition, "_", day)
)
# Define the order you want to appear in the heatmap
sample_df <- sample_df %>%
mutate(condition_day = factor(condition_day,
levels = c("Healthy_D12", "Healthy_D16",
"PKD_D12", "PKD_D16")))
# Now reorder your metadata according to condition_day and replicate within day
sample_df <- sample_df %>%
arrange(condition_day, condition, day, rep)
# Reorder your scaled expression matrix columns to match sample_df order
mat_scaled <- mat_scaled[, sample_df$sample_id, drop = FALSE]
# Confirm order
colnames(mat_scaled)## [1] "WP3_D12_1" "WP3_D12_2" "WP3_D16_1" "WP3_D16_2" "PKD_10_D12_1"
## [6] "PKD_11_D12_1" "PKD_10_D12_2" "PKD_11_D12_2" "PKD_10_D16_1" "PKD_11_D16_1"
## [11] "PKD_10_D16_2" "PKD_11_D16_2"
annotation_col <- data.frame(
Condition = sample_df$condition,
Day = sample_df$day
)
rownames(annotation_col) <- sample_df$sample_id
# Reorder annotation table to match the matrix
annotation_col <- annotation_col[colnames(mat_scaled), ]
ann_colors <- list(
Condition = c(Healthy = "#E0BA3E", PKD = "#529DDA"),
Day = c(D12 = "#b3b3b3", D16 = "#4d4d4d")
)
library(pheatmap)
pheatmap(
mat_scaled,
color = colorRampPalette(c("blue", "white", "red"))(100),
cluster_rows = TRUE,
cluster_cols = FALSE, # keep your custom column order
annotation_col = annotation_col,
annotation_colors = ann_colors,
fontsize = 10,
border_color = NA,
main = "Top 50 DEGs (Healthy vs PKD, by Day)",
show_rownames = TRUE,
show_colnames = FALSE,
)